*Cloud plot for fields*

clear
use "Data\kmeans ambition types.dta"

**Keep only those for who we observe ambition type**
keep if wage_growth_ambition!=.

*Keep only couples where we observe both partners
gen temp2=koen
destring temp2, replace
by couple_id aar, sort: egen temp3=mean(temp2)
keep if temp3==1.5
drop temp2 temp3

sort pnr aar

***STATUS***
count
*observations 70,233,782
***

merge 1:1 pnr aar using "Data\post_secondary_fields_full_population.dta", keepusing(educ_level_field educ_level_field_num)

drop _merge

**Make map
sort pnr aar

gen temp=educ_eika if final_educ==hfaudd
by pnr: egen final_educ_level_3=mean(temp)
drop temp

forvalues i=81(1)85{

replace final_educ_level_3=1 if final_educ==1109`i'
replace final_educ_level_3=1 if final_educ==1110`i'
replace final_educ_level_3=1 if final_educ==1107`i'
replace final_educ_level_3=1 if final_educ==1008`i'
replace final_educ_level_3=1 if final_educ==1023`i'
replace final_educ_level_3=1 if final_educ==1123`i'
replace final_educ_level_3=1 if final_educ==1009`i'
replace final_educ_level_3=1 if final_educ==1022`i'
replace final_educ_level_3=1 if final_educ==1010`i'

}

gen temp=fined if final_educ==hfaudd
by pnr: egen final_educ_level_4=mean(temp)
drop temp

forvalues i=81(1)85{

replace final_educ_level_4=1 if final_educ==1109`i'
replace final_educ_level_4=1 if final_educ==1110`i'
replace final_educ_level_4=1 if final_educ==1107`i'
replace final_educ_level_4=1 if final_educ==1008`i'
replace final_educ_level_4=1 if final_educ==1023`i'
replace final_educ_level_4=1 if final_educ==1123`i'
replace final_educ_level_4=1 if final_educ==1009`i'
replace final_educ_level_4=1 if final_educ==1022`i'
replace final_educ_level_4=1 if final_educ==1010`i'

}

gen temp=educ_level_field_num if final_educ==hfaudd
by pnr: egen final_field_num=mean(temp)
drop temp

gen temp=educ_level_field if final_educ==hfaudd
by pnr: egen final_field=mode(temp), maxmode
drop temp

forvalues i=81(1)85{

replace final_field_num=1 if final_educ==1109`i'
replace final_field_num=1 if final_educ==1110`i'
replace final_field_num=1 if final_educ==1107`i'
replace final_field_num=1 if final_educ==1008`i'
replace final_field_num=1 if final_educ==1023`i'
replace final_field_num=1 if final_educ==1123`i'
replace final_field_num=1 if final_educ==1009`i'
replace final_field_num=1 if final_educ==1022`i'
replace final_field_num=1 if final_educ==1010`i'

}

forvalues i=81(1)85{

replace final_field="Primary" if final_educ==1109`i'
replace final_field="Primary" if final_educ==1110`i'
replace final_field="Primary" if final_educ==1107`i'
replace final_field="Primary" if final_educ==1008`i'
replace final_field="Primary" if final_educ==1023`i'
replace final_field="Primary" if final_educ==1123`i'
replace final_field="Primary" if final_educ==1009`i'
replace final_field="Primary" if final_educ==1022`i'
replace final_field="Primary" if final_educ==1010`i'

}


keep if aar==2018
drop if final_educ==. | final_educ==1
sort final_educ

by final_educ: egen final_educ_level_group_3=mean(final_educ_level_3)
by final_educ: egen final_educ_level_group_4=mean(final_educ_level_4)
by final_educ: egen final_field_num_group=mean(final_field_num)
by final_educ: egen final_field_group=mode(final_field), maxmode


by final_educ: egen level_mean_s=mean(wage_start_mean_ambition_s)
by final_educ: egen growth_mean_s=mean(wage_growth_ambition_s)
by final_educ: egen educ_count=count(final_educ)


collapse (first) level_mean_s growth_mean_s educ_count final_educ_level_group_3 final_educ_level_group_4 ambition_type_k_3_s ambition_type_k_4_s ambition_type_k_5_s final_field_num_group final_field_group, by(final_educ)

gen educ_code=string(final_educ)


*Fig 1a
twoway scatter growth_mean_s level_mean_s if final_educ_level_group_4==1 & educ_count>=10,  msymbol(square_hollow) mcolor(navy) || ///
scatter growth_mean_s level_mean_s if final_educ_level_group_4==2 & educ_count>=10, msymbol(circle_hollow) mcolor(maroon) || ///
scatter growth_mean_s level_mean_s if final_educ_level_group_4==3 & educ_count>=10, msymbol(smdiamond_hollow) mcolor(dkorange) || ///
scatter growth_mean_s level_mean_s if final_educ_level_group_4==4 & educ_count>=10, msymbol(diamond_hollow) mcolor(teal) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "Primary" 2 "Secondary" 3 "Bachelor" 4 "Master & PhD") position(6) cols(2) rows(2)) graphregion(fcolor(white))

*Fig 1b
twoway scatter growth_mean_s level_mean_s if ambition_type_k_4_s==1 & educ_count>=10, msymbol(diamond_hollow) mcolor(teal) || ///
scatter growth_mean_s level_mean_s if ambition_type_k_4_s==2 & educ_count>=10, msymbol(circle_hollow) mcolor(maroon) || ///
scatter growth_mean_s level_mean_s if ambition_type_k_4_s==3 & educ_count>=10, msymbol(triangle_hollow) mcolor(dkorange) || ///
scatter growth_mean_s level_mean_s if ambition_type_k_4_s==4 & educ_count>=10, msymbol(square_hollow) mcolor(navy) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "low w{sub:0}, low g" 4 "high w{sub:0}, low g" 3 "low w{sub:0}, high g" 2 "high w{sub:0}, high g") position(6) cols(2) rows(2)) graphregion(fcolor(white))


*Fig 1 - fields ver4
twoway scatter growth_mean_s level_mean_s if final_field_group=="Primary" & educ_count>=10,  msymbol(square_hollow) mcolor(gs12%50) || ///
scatter growth_mean_s level_mean_s if final_field_group=="Secondary" & educ_count>=10, msymbol(circle_hollow) mcolor(gs12%50) || ///
scatter growth_mean_s level_mean_s if final_field_group=="Business" & educ_count>=10, msymbol(smtriangle_hollow ) mcolor(lavender) || ///
scatter growth_mean_s level_mean_s if final_field_group=="Education and Humanities" & educ_count>=10, msymbol(triangle_hollow) mcolor(teal) || ///
scatter growth_mean_s level_mean_s if final_field_group=="Health and Welfare" & educ_count>=10, msymbol(diamond_hollow) mcolor(dkorange) || ///
scatter growth_mean_s level_mean_s if final_field_group=="Social Science" & educ_count>=10, msymbol(smsquare_hollow ) mcolor(maroon) || ///
scatter growth_mean_s level_mean_s if final_field_group=="STEM" & educ_count>=10, msymbol(smdiamond_hollow) mcolor(navy) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "Primary" 2 "Secondary" 4 "Humanities" 5 "Health" 6 "Social Science" 7 "STEM" 3 "Business") position(6) cols(4) rows(2)) graphregion(fcolor(white))



*example plot

*help categorize
gsort final_field_group -educ_count

gen program_name=""

replace program_name="Preschool teacher" if educ_code=="5444" /*Health*/
replace program_name="Nurse" if educ_code=="5166" /*Health*/
replace program_name="Medicin" if educ_code=="7170" /*Health*/

replace program_name="Ordinary HS" if educ_code=="1198" /*Secondary*/
replace program_name="Carpenter" if educ_code=="4283" /*Secondary*/
replace program_name="Office clerk" if educ_code=="4981" /*Secondary*/
replace program_name="Business HS" if educ_code=="5098" /*Secondary*/
replace program_name="Bank advisor" if educ_code=="4986" /*Secondary*/

replace program_name="Teacher" if educ_code=="5440" /*Humanities*/


replace program_name="Architect" if educ_code=="5470" /*STEM NOT one of biggest*/


replace program_name="10th grade" if educ_code=="111084" /*Primary*/


replace program_name="Business grad" if educ_code=="5703" /*Business*/
replace program_name="Business bach" if educ_code=="5700" /*Business*/


replace program_name="Law" if educ_code=="7085" /*Social Science*/


*plot for fields ver 1

twoway scatter growth_mean_s level_mean_s if inlist(educ_code, "111084"),  msymbol(square_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4981"), msymbol(circle_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5444"), msymbol(diamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(5) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5703"), msymbol(smtriangle_hollow) mcolor(lavender) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "1198"), msymbol(circle_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(6) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4283"), msymbol(circle_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5098"), msymbol(circle_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4986"), msymbol(circle_hollow) mcolor(gs12%50) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5440"), msymbol(triangle_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5166"), msymbol(diamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5700"), msymbol(smtriangle_hollow) mcolor(lavender) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7170"), msymbol(diamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"5470"), msymbol(smdiamond_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7085"), msymbol(smsquare_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(12) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "Primary" 2 "Secondary" 9 "Humanities" 3 "Health" 14 "Social Science" 13 "STEM" 4 "Business") ///
position(6) cols(4) rows(2)) graphregion(fcolor(white)) xscale(range(-2 6)) xlabel(-2(2)6) yscale(range(-5 6)) ylabel(-5(5)5)


*educ

twoway scatter growth_mean_s level_mean_s if inlist(educ_code, "111084"),  msymbol(square_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4981"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5444"), msymbol(smdiamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(5) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5703"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "1198"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(6) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4283"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5098"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4986"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5440"), msymbol(smdiamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5166"), msymbol(smdiamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5700"), msymbol(smdiamond_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7170"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"5470"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7085"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(12) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "Primary" 2 "Secondary" 3 "Bachelor" 4 "Master & PhD") position(6) cols(2) rows(2)) graphregion(fcolor(white)) xscale(range(-2 6)) xlabel(-2(2)6) yscale(range(-5 6)) ylabel(-5(5)5)

*ambition

twoway scatter growth_mean_s level_mean_s if inlist(educ_code, "5444"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(5) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5440"), msymbol(square_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4981"), msymbol(triangle_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5703"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "1198"), msymbol(diamond_hollow) mcolor(teal) ///
mlab(program_name) mlabc(black) mlabposition(6) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5166"), msymbol(square_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4283"), msymbol(square_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"5470"), msymbol(square_hollow) mcolor(navy) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5098"), msymbol(triangle_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "111084"),  msymbol(triangle_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(12) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "4986"), msymbol(triangle_hollow) mcolor(dkorange) ///
mlab(program_name) mlabc(black) mlabposition(3) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code, "5700"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7170"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(1) || ///
scatter growth_mean_s level_mean_s if inlist(educ_code,"7085"), msymbol(circle_hollow) mcolor(maroon) ///
mlab(program_name) mlabc(black) mlabposition(12) ///
xtitle("starting wage (standardized)") ytitle("growth (standardized)") ///
legend(order(1 "low w{sub:0}, low g" 2 "high w{sub:0}, low g" 3 "low w{sub:0}, high g" 4 "high w{sub:0}, high g") position(6) cols(2) rows(2)) graphregion(fcolor(white)) xscale(range(-2 6)) xlabel(-2(2)6) yscale(range(-5 6)) ylabel(-5(5)5)